from pyspark import SparkConf, SparkContext
import os

if __name__ == '__main__':
    os.environ["PYSPARK_PYTHON"] = "D:/Python/Python310/python.exe"
    conf = SparkConf().setAppName("create rdd").setMaster("local[*]")
    sc = SparkContext(conf=conf)
    rdd = sc.parallelize([('a', 1), ('b', 1), ('a', 2), ('b', 2), ('a', 3), ('b', 40)])
    result = rdd.reduceByKey(lambda a, b: a + b)
    print(result.collect())
